pacman::p_load(tidyverse,xtable,texreg)
v <- function(x) View(get(x, envir = .GlobalEnv))

capitalize <- function(s) {
  paste0(toupper(substr(s, 1, 1)), substr(s, 2, nchar(s)))
}

setwd("")#set working directory 

#import correlation tables
nh_dems<-read.csv("nh_dems_groupbyyear_corrs.csv")
nh_gop<-read.csv("nh_gop_groupbyyear_corrs.csv")
pa_dems<-read.csv("pa_dems_groupbyyear_corrs.csv")
pa_gop<-read.csv("pa_gop_groupbyyear_corrs.csv")
al_gop<-read.csv("al_groupbyyear_corrs.csv")
oh_gop<-read.csv("oh_gop_groupbyyear_corrs.csv")
oh_dems<-read.csv("oh_dems_groupbyyear_corrs.csv")
ca_gop<-read.csv("ca_gop_groupbyyear_corrs.csv")
ca_dems<-read.csv("ca_dems_groupbyyear_corrs.csv")

#import voteshares
nh_voteshares <- read.csv("nh-voteshares.csv")%>%mutate(state="NH")%>%dplyr::rename(voteshare=voteshares)%>%dplyr::select(-X)
pa_voteshares <- read.csv("pa-voteshares.csv")%>%mutate(state="PA")%>%dplyr::select(-X)
al_voteshares <- read.csv("al-voteshares.csv")%>%mutate(state="AL")%>%dplyr::select(-X)
oh_voteshares <- read.csv("oh-voteshares.csv")%>%mutate(state="OH")%>%dplyr::select(-X)
ca_voteshares <- read.csv("ca-voteshares.csv")%>%mutate(state="CA")%>%dplyr::select(-c(X,V1))%>%dplyr::rename(cand = RowNames)
voteshares <- rbind(oh_voteshares, al_voteshares, pa_voteshares, nh_voteshares, ca_voteshares) %>%
  filter(., voteshare > 0.1)#subset to only candidates who got more than 10% of the vote

#merge and get around coincident senate and presidential elections (dem senate and president, 2016 PA)
corr <- rbind(al_gop, pa_gop, pa_dems, nh_dems, nh_gop, oh_dems, oh_gop, ca_gop, ca_dems)
corr$year_cand1.sen.pres <- corr$year_cand1
corr$year_cand2.sen.pres <- corr$year_cand2

#distinguish senate and presidential campaigns that took place in the same year
for (i in 1:length(corr[,1])){
  if (corr$variable_2[i] %in% c("bolduc.pct.20", "messner.pct.20")){
    corr$year_cand1.sen.pres[i] <- 2020.5}
  if (corr$variable[i] %in% c("bolduc.pct.20", "messner.pct.20")){
    corr$year_cand2.sen.pres[i] <- 2020.5}
  if (corr$variable_2[i] %in% c("SHELBY.SENPCT16", "MCCONNEL.SENPCT16")){
    corr$year_cand1.sen.pres[i] <- 2016.5}
  if (corr$variable[i] %in% c("SHELBY.SENPCT16", "MCCONNEL.SENPCT16")){
    corr$year_cand2.sen.pres[i] <- 2016.5}
  if (corr$variable_2[i] %in% c("sestak.16.pct", "fetterman.16.pct", "mcginty.16.pct")){
    corr$year_cand1.sen.pres[i] <- 2016.5}
  if (corr$variable[i] %in% c("sestak.16.pct", "fetterman.16.pct", "mcginty.16.pct")){
    corr$year_cand2.sen.pres[i] <- 2016.5}
  if (corr$variable_2[i] %in% c("moreno.2024.pct", "larose.2024.pct", "dolan.2024.pct")){
    corr$year_cand1.sen.pres[i] <- 2024.5}
  if (corr$variable[i] %in% c("moreno.2024.pct", "larose.2024.pct", "dolan.2024.pct")){
    corr$year_cand2.sen.pres[i] <- 2024.5}
}

#Fix Dems title (inconsistency in labeling)
corr$party[corr$party=='DEM'] <- 'DEMS'

#Clean suffix strings
corr$variable_2 <-gsub("PCT16","",as.character(corr$variable_2))
corr$variable <-gsub("PCT16","",as.character(corr$variable))
corr$variable_2 <-gsub("PCT22","",as.character(corr$variable_2))
corr$variable <-gsub("PCT22","",as.character(corr$variable))
corr$variable_2 <-gsub("PCT20","",as.character(corr$variable_2))
corr$variable <-gsub("PCT20","",as.character(corr$variable))
corr$variable_2 <-gsub("PCT17","",as.character(corr$variable_2))
corr$variable <-gsub("PCT17","",as.character(corr$variable))
corr$variable_2 <-gsub(".12.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".12.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".16.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".16.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".20.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".20.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".10.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".10.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".22.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".22.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".24.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".24.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".08.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".08.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".18.pct","",as.character(corr$variable_2))
corr$variable <-gsub(".18.pct","",as.character(corr$variable))
corr$variable_2 <-gsub(".r_GOP_Senate_2022","",as.character(corr$variable_2))
corr$variable <-gsub(",.r_GOP_Senate_2022","",as.character(corr$variable))
corr$variable_2 <-gsub("_GOP24_COUNT_GOP_President_2024","",as.character(corr$variable_2))
corr$variable <-gsub("_GOP24_COUNT_GOP_President_2024","",as.character(corr$variable))
corr$variable_2 <-gsub("r_GOP_Senate_2010","",as.character(corr$variable_2))
corr$variable <-gsub("r_GOP_Senate_2010","",as.character(corr$variable))
corr$variable_2 <-gsub("..r_GOP_President_2008","",as.character(corr$variable_2))
corr$variable <-gsub("..r_GOP_President_2008","",as.character(corr$variable))
corr$variable_2 <-gsub("_GOP12_PRESIDENT","",as.character(corr$variable_2))
corr$variable <-gsub("_GOP12_PRESIDENT","",as.character(corr$variable))
corr$variable_2 <-gsub("..d_DEMS_President_2008","",as.character(corr$variable_2))
corr$variable <-gsub("..d_DEMS_President_2008","",as.character(corr$variable))
corr$variable_2 <-gsub(", r_GOP_President_2016","",as.character(corr$variable_2))
corr$variable <-gsub(", r_GOP_President_2016","",as.character(corr$variable))
corr$variable_2 <-gsub("_GOP_Senate_2020","",as.character(corr$variable_2))
corr$variable <-gsub("_GOP_Senate_2020","",as.character(corr$variable))
corr$variable <-gsub(", d_DEMS_President_2016","",as.character(corr$variable))
corr$variable <-gsub("_DEMS_President_2020","",as.character(corr$variable))
corr$variable <- tolower(corr$variable)
corr$variable <- gsub("\\.", "", corr$variable)
corr$variable <- gsub("\\,", "", corr$variable)
corr$variable_2 <- tolower(corr$variable_2)
corr$variable_2 <- gsub("\\.", "", corr$variable_2)
corr$variable_2 <- gsub("\\,", "", corr$variable_2)
unique(corr$variable_2)
corr$variable_2[corr$variable_2 == 'donaldcbolduc'] <- 'bolduc'
corr$variable_2[corr$variable_2 == 'ovidelamontagne'] <- 'lamontagne'
corr$variable_2[corr$variable_2 == 'kellyayotte'] <- 'ayotte'
corr$variable_2[corr$variable_2 == 'billbinnie'] <- 'binnie'
corr$variable_2[corr$variable_2 == 'kevinsmith'] <- 'smith'
corr$variable_2[corr$variable_2 == 'chuckmorse'] <- 'morse'
corr$variable[corr$variable == 'donaldcbolduc'] <- 'bolduc'
corr$variable[corr$variable == 'ovidelamontagne'] <- 'lamontagne'
corr$variable[corr$variable == 'kellyayotte'] <- 'ayotte'
corr$variable[corr$variable == 'billbinnie'] <- 'binnie'
corr$variable[corr$variable == 'kevinsmith'] <- 'smith'
corr$variable[corr$variable == 'chuckmorse'] <- 'morse'
corr$variable_2 <- gsub("2", "", corr$variable_2)
corr$variable <- gsub("2", "", corr$variable)
corr$variable_2 <- gsub("pct", "", corr$variable_2)
corr$variable <- gsub("pct", "", corr$variable)
corr$variable_2 <- gsub("[0-9]", "", corr$variable_2)
corr$variable <- gsub("[0-9]", "", corr$variable)
corr$variable_2 <- gsub("_", "", corr$variable_2)
corr$variable <- gsub("_", "", corr$variable)
corr$variable_2 <- gsub("divdem", "", corr$variable_2)
corr$variable <- gsub("divdem", "", corr$variable)
corr$variable_2 <- gsub("divrep", "", corr$variable_2)
corr$variable <- gsub("divrep", "", corr$variable)
corr$variable_2 <- gsub("michaelrbloomberg", "bloomberg", corr$variable_2)
corr$variable <- gsub("michaelrbloomberg", "bloomberg", corr$variable)
corr$variable_2 <- gsub("josephrbiden", "biden", corr$variable_2)
corr$variable <- gsub("josephrbiden", "biden", corr$variable)
corr$variable_2 <- gsub("johnrkasich", "kasich", corr$variable_2)
corr$variable <- gsub("johnrkasich", "kasich", corr$variable)
corr$variable_2 <- gsub("johnmccain", "mccain", corr$variable_2)
corr$variable <- gsub("johnmccain", "mccain", corr$variable)
corr$variable_2 <- gsub("hillaryclinton", "clinton", corr$variable_2)
corr$variable <- gsub("hillaryclinton", "clinton", corr$variable)
corr$variable_2 <- gsub("elizabethwarren", "warren", corr$variable_2)
corr$variable <- gsub("elizabethwarren", "warren", corr$variable)
corr$variable_2 <- gsub("donaldtrump", "trump", corr$variable_2)
corr$variable <- gsub("donaldtrump", "trump", corr$variable)
corr$variable_2 <- gsub("donaldjtrump", "trump", corr$variable_2)
corr$variable <- gsub("donaldjtrump", "trump", corr$variable)
corr$variable_2 <- gsub("berniesanders", "sanders", corr$variable_2)
corr$variable <- gsub("berniesanders", "sanders", corr$variable)
corr$variable_2 <- gsub("mittromney", "romney", corr$variable_2)
corr$variable <- gsub("mittromney", "romney", corr$variable)
corr$variable_2 <- gsub("trump.*", "trump", corr$variable_2)
corr$variable <- gsub("trump.*", "trump", corr$variable)
corr$variable_2 <- gsub("barackobama", "obama", corr$variable_2)
corr$variable <- gsub("barackobama", "obama", corr$variable)
corr$variable_2 <- gsub("mikehuckabee", "huckabee", corr$variable_2)
corr$variable <- gsub("mikehuckabee", "huckabee", corr$variable)
corr$variable_2 <- gsub("mcconnelsen", "mcconnell", corr$variable_2)
corr$variable <- gsub("mcconnelsen", "mcconnell", corr$variable)
corr$variable_2 <- gsub("shelbysen", "shelby", corr$variable_2)
corr$variable <- gsub("shelbysen", "shelby", corr$variable)
corr$variable_2 <- gsub("buttgieg", "buttigieg", corr$variable_2)
corr$variable <- gsub("buttgieg", "buttigieg", corr$variable)
corr$variable_2 <- gsub("ronpaul", "paul", corr$variable_2)
corr$variable <- gsub("ronpaul", "paul", corr$variable)

#clear voteshare dataset
voteshares$cand <-gsub("PCT16","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT16","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT22","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT22","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT20","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT20","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT17","",as.character(voteshares$cand))
voteshares$cand <-gsub("PCT17","",as.character(voteshares$cand))
voteshares$cand <-gsub(".12.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".12.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".16.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".16.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".20.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".20.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".10.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".10.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".22.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".22.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".24.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".24.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".08.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".08.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".18.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".18.pct","",as.character(voteshares$cand))
voteshares$cand <-gsub(".r_GOP_Senate_2022","",as.character(voteshares$cand))
voteshares$cand <-gsub(",.r_GOP_Senate_2022","",as.character(voteshares$cand))
voteshares$cand <-gsub("_GOP24_COUNT_GOP_President_2024","",as.character(voteshares$cand))
voteshares$cand <-gsub("_GOP24_COUNT_GOP_President_2024","",as.character(voteshares$cand))
voteshares$cand <-gsub("r_GOP_Senate_2010","",as.character(voteshares$cand))
voteshares$cand <-gsub("r_GOP_Senate_2010","",as.character(voteshares$cand))
voteshares$cand <-gsub("..r_GOP_President_2008","",as.character(voteshares$cand))
voteshares$cand <-gsub("..r_GOP_President_2008","",as.character(voteshares$cand))
voteshares$cand <-gsub("_GOP12_PRESIDENT","",as.character(voteshares$cand))
voteshares$cand <-gsub("_GOP12_PRESIDENT","",as.character(voteshares$cand))
voteshares$cand <-gsub("..d_DEMS_President_2008","",as.character(voteshares$cand))
voteshares$cand <-gsub("..d_DEMS_President_2008","",as.character(voteshares$cand))
voteshares$cand <-gsub(", r_GOP_President_2016","",as.character(voteshares$cand))
voteshares$cand <-gsub(", r_GOP_President_2016","",as.character(voteshares$cand))
voteshares$cand <-gsub("_GOP_Senate_2020","",as.character(voteshares$cand))
voteshares$cand <-gsub("_GOP_Senate_2020","",as.character(voteshares$cand))
voteshares$cand <-gsub(", d_DEMS_President_2016","",as.character(voteshares$cand))
voteshares$cand <-gsub("_DEMS_President_2020","",as.character(voteshares$cand))
voteshares$cand <- tolower(voteshares$cand)
voteshares$cand <- gsub("\\.", "", voteshares$cand)
voteshares$cand <- gsub("\\,", "", voteshares$cand)
voteshares$cand <- tolower(voteshares$cand)
voteshares$cand <- gsub("\\.", "", voteshares$cand)
voteshares$cand <- gsub("\\,", "", voteshares$cand)
unique(voteshares$cand)
voteshares$cand[voteshares$cand == 'donaldcbolduc'] <- 'bolduc'
voteshares$cand[voteshares$cand == 'ovidelamontagne'] <- 'lamontagne'
voteshares$cand[voteshares$cand == 'kellyayotte'] <- 'ayotte'
voteshares$cand[voteshares$cand == 'billbinnie'] <- 'binnie'
voteshares$cand[voteshares$cand == 'kevinsmith'] <- 'smith'
voteshares$cand[voteshares$cand == 'chuckmorse'] <- 'morse'
voteshares$cand[voteshares$cand == 'donaldcbolduc'] <- 'bolduc'
voteshares$cand[voteshares$cand == 'ovidelamontagne'] <- 'lamontagne'
voteshares$cand[voteshares$cand == 'ovidelamontagner'] <- 'lamontagne'
voteshares$cand[voteshares$cand == 'kellyayotte'] <- 'ayotte'
voteshares$cand[voteshares$cand == 'billbinnie'] <- 'binnie'
voteshares$cand[voteshares$cand == 'kevinsmith'] <- 'smith'
voteshares$cand[voteshares$cand == 'chuckmorse'] <- 'morse'
voteshares$cand[voteshares$cand == 'franklaroser'] <- 'larose'
voteshares$cand[voteshares$cand == 'mattdolanr'] <- 'dolan'
voteshares$cand[voteshares$cand == 'berniemorenor'] <- 'moreno'
voteshares$cand[voteshares$cand == 'president(r)-gingrichnewt'] <- 'gingrich'
voteshares$cand[voteshares$cand == 'tedcruz(r)'] <- 'cruz'
voteshares$cand[voteshares$cand == 'donaldjtrumpr'] <- 'trump'
voteshares$cand[voteshares$cand == 'nikkirhaleyr'] <- 'haley'
voteshares$cand[voteshares$cand == 'president(r)-romneymitt'] <- 'romney'
voteshares$cand[voteshares$cand == 'president(r)-santorumrick'] <- 'santorum'
voteshares$cand[voteshares$cand == 'johnrkasich(r)'] <- 'kasich'
voteshares$cand[voteshares$cand == 'donaldjtrump(r)'] <- 'trump'
voteshares$cand[voteshares$cand == 'melissaackison(r)'] <- 'ackison'
voteshares$cand[voteshares$cand == 'mikegibbons(r)'] <- 'gibbons'
voteshares$cand[voteshares$cand == 'jimrenacci(r)'] <- 'renacci'
voteshares$cand[voteshares$cand == 'ussenate(s)-daniellabotz'] <- 'labotz'
voteshares$cand[voteshares$cand == 'ussenate(c)-ericdeaton'] <- 'deaton'
voteshares$cand[voteshares$cand == 'ussenate(d)-jenniferbrunner'] <- 'brunner'
voteshares$cand[voteshares$cand == 'hillaryclinton(d)'] <- 'clinton'
voteshares$cand[voteshares$cand == 'berniesanders(d)'] <- 'sanders'
voteshares$cand[voteshares$cand == 'josephrbidenjr(d)'] <- 'biden'
voteshares$cand[voteshares$cand == 'ussendemwillboyd'] <- 'boyd'
voteshares$cand[voteshares$cand == 'ussenrepkatiebritt'] <- 'britt'
voteshares$cand[voteshares$cand == 'ussenrepmobrooks'] <- 'brooks'
voteshares$cand[voteshares$cand == 'ussenrepmikedurant'] <- 'durant'
voteshares$cand[voteshares$cand == 'ussen0bradleybyrne'] <- 'byrne'
voteshares$cand[voteshares$cand == 'ussen0jeffsessions'] <- 'sessions'
voteshares$cand[voteshares$cand == 'ussen0tommytuberville'] <- 'tuberville'
voteshares$cand[voteshares$cand == 'ussen17mobrooks'] <- 'brooks'
voteshares$cand[voteshares$cand == 'ussen17roysmoore'] <- 'moore'
voteshares$cand[voteshares$cand == 'ussen17lutherstrange'] <- 'strange'
voteshares$cand[voteshares$cand == 'pres16bencarson'] <- 'carson'
voteshares$cand[voteshares$cand == 'pres16donaldjtrump'] <- 'trump'
voteshares$cand[voteshares$cand == 'pres16marcorubio'] <- 'rubio'
voteshares$cand[voteshares$cand == 'pres16tedcruz'] <- 'cruz'
voteshares$cand <- gsub("2", "", voteshares$cand)
voteshares$cand <- gsub("2", "", voteshares$cand)
voteshares$cand <- gsub("2", "", voteshares$cand)
voteshares$cand[voteshares$cand == 'ussendemwillboyd'] <- 'boyd'
voteshares$cand[voteshares$cand == 'ussenrepkatiebritt'] <- 'britt'
voteshares$cand[voteshares$cand == 'ussenrepmobrooks'] <- 'brooks'
voteshares$cand[voteshares$cand == 'ussenrepmikedurant'] <- 'durant'
voteshares$cand[voteshares$cand == 'ussen0bradleybyrne'] <- 'byrne'
voteshares$cand[voteshares$cand == 'ussen0jeffsessions'] <- 'sessions'
voteshares$cand[voteshares$cand == 'ussen0tommytuberville'] <- 'tuberville'
voteshares$cand <- ifelse(str_detect(voteshares$cand, "_.*_"), 
                          str_extract(voteshares$cand, "(?<=_)[^_]+(?=_)"), 
                          voteshares$cand)
voteshares <- voteshares %>%
  mutate(cand = cand %>%
           gsub("donaldjtrump", "trump", .) %>%
           gsub("josephrbiden", "biden", .) %>%
           gsub("michaelrbloomberg", "bloomberg", .) %>%
           gsub("berniesanders", "sanders", .) %>%
           gsub("elizabethwarren", "warren", .) %>%
           gsub("johnrkasich", "kasich", .) %>%
           gsub("donaldtrump", "trump", .) %>%
           gsub("hillaryclinton", "clinton", .) %>%
           gsub("mittromney", "romney", .) %>%
           gsub("ronpaul", "paul", .) %>%
           gsub("barackobama", "obama", .) %>%
           gsub("mikehuckabee", "huckabee", .) %>%
           gsub("johnmccain", "mccain", .) %>%
           gsub("^trump.*", "trump", .) %>%
           gsub("ussen16jonathanmcconnell", "mcconnell", .) %>%
           gsub("ussen16richardcshelby", "shelby", .)%>%
           gsub("ussen16richardcshelby", "shelby", .)%>%
           gsub("romneyr", "romney", .)%>%
           gsub("sanders d", "sanders", .)%>%
           gsub("sandersd", "sanders", .)%>%
           gsub("kasich r", "kasich", .)%>%
           gsub("cruz r", "cruz", .)%>%
           gsub("rubio r", "rubio", .)%>%
           gsub("edwardsd", "edwards", .)%>%
           gsub("clintond", "clinton", .)%>%
           gsub("chuckmorser", "morse", .)%>%
           gsub("buttigiegd", "buttigieg", .)%>%
           gsub("huckabeer", "huckabee", .)%>%
           gsub("kevinsmithr", "smith", .)%>%
           gsub("klobuchard", "klobuchar", .)%>%
           gsub("mccainr", "mccain", .)%>%
           gsub("obamad", "obama", .)%>%
           gsub("billbinnier", "binnie", .)%>%
           gsub("billbinnier", "binnie", .)%>%
           gsub("clinton d", "clinton", .)%>%
           gsub("bush r", "bush", .)%>%
           gsub("donaldcbolducr", "bolduc", .)%>%
           gsub("kellyayotter", "ayotte", .))


# merge dime scores with correlation table. Download: https://data.stanford.edu/dime
#Bonica, Adam. 2024. Database on Ideology, Money in Politics, and Elections: Public version 4.0 [Computer file]. Stanford, CA: Stanford University Libraries. https://data.stanford.edu/dime.
dime<-read.csv("dime_recipients_1979_2024.csv")%>%
  filter(lname%in%corr$variable_2)%>%
  filter(fecyear>2007)%>%
  filter(grepl("fd", election))%>%
  filter(state%in%c("00", "AL", "PA", "CA", "NH", "OH"))%>%
  filter(seat%in%c("federal:senate", "federal:president"))%>%
  mutate(year=cycle)%>%
  #manually excluding irrelevant candidates
  filter(name!="bush, willita d")%>%
  filter(name!="cruz, erin")%>%
  filter(name!="cruz, israel")%>%
  filter(name!="moore, david")%>%
  filter(name!="smith, charles e")%>%
  mutate(party=as.character(party)%>%
           dplyr::recode(
             "100"="DEMS",
             "200"="GOP"
           ))%>%
  #dealing with AL special election (coded as 2018, took place in 2017)
  mutate(year=ifelse(lname=="strange"&year==2018,2017,year))%>%
  mutate(year=ifelse(lname=="moore"&year==2018,2017,year))%>%
  mutate(year=ifelse(lname=="brooks"&year==2018,2017,year))%>%
  mutate(nameyear=paste0(lname,year))%>%
  filter(nameyear%in%paste0(corr$variable_2,corr$year_cand1))

#clean presidential nominee duplicates
dime.nominee<-dime%>%
  filter(grepl("nominee", bonica.rid))%>%
  mutate(nameyear=paste0(lname,year))
  
dime<-dime%>%
  filter(!nameyear%in%dime.nominee$nameyear)%>%
  rbind(dime.nominee)%>%
  dplyr::select(lname, year, party, recipient.cfscore, recipient.cfscore.dyn)

tableA14<-dime%>%
  mutate(party=party%>%dplyr::recode(
    "GOP"="Republicans", 
    "DEMS"="Democrats"
  ))%>%
  mutate(lname=capitalize(lname))%>%
  arrange(party, year,recipient.cfscore.dyn)%>%
  dplyr::select(party, year, lname, everything())%>%
  setNames(c("Party", "Year", "Candidate", "CF Score", "CF Score (Dynamic)"))

print(xtable(tableA14, 
             label="t:dime", 
             caption=c("DIME scores for candidates in competitive senate primary elections in Alabama, Ohio, New Hampshire and Pennsylvania, as well as presidential candidates, from 2008 to 2024."),
             digits=c(rep(0,3),rep(3,3))),
      include.rownames=F,
      scalebox=0.7,
      file=c("tableA14.tex"))#table A14

dime.dem <- dime%>%
  filter(party=="DEMS")
round(sd(dime.dem$recipient.cfscore.dyn,na.rm=T),3)#0.24
dime.gop <- dime%>%
  filter(party=="GOP")
round(sd(dime.gop$recipient.cfscore.dyn,na.rm=T),3)#0.275

unique(corr$variable_2[!corr$variable_2%in%c(dime$lname)])#NOTE: Nikki Haley's 2024 campaign doesn't appear in the data

#merge for first candidate
merged.corr.dime <- merge(corr, dime,
                  by.y=c("lname","year", "party"), by.x=c("variable_2", "year_cand1", "party"),
                  all.x=T)

colnames(merged.corr.dime)[colnames(merged.corr.dime) == "recipient.cfscore"] ="cfscore_cand1"
colnames(merged.corr.dime)[colnames(merged.corr.dime) == "recipient.cfscore.dyn"] ="cfscore.dyn_cand1"

#merge for second candidate
merged.corr.dime.2 <- merge(merged.corr.dime, dime,
                          by.y=c("lname","year", "party"), by.x=c("variable", "year_cand2","party"),
                          all.x=T)

colnames(merged.corr.dime.2)[colnames(merged.corr.dime.2) == "recipient.cfscore"] ="cfscore_cand2"
colnames(merged.corr.dime.2)[colnames(merged.corr.dime.2) == "recipient.cfscore.dyn"] ="cfscore.dyn_cand2"

colnames(merged.corr.dime.2)[colnames(merged.corr.dime.2) == "variable"] ="cand2"
colnames(merged.corr.dime.2)[colnames(merged.corr.dime.2) == "variable_2"] ="cand1"
colnames(merged.corr.dime.2)[colnames(merged.corr.dime.2) == "value"] ="corr"

#create dime score/iscap ideology diff variable
merged.corr.dime.2$cfscore.diff <- abs(merged.corr.dime.2$cfscore.dyn_cand1 - merged.corr.dime.2$cfscore.dyn_cand2)

#vector of presidential candidates
pres.list <- c("obama", "haley","edwards","gingrich","romney","cruz", "trump","santorum","kasich", "huntsman", "rubio","mccain", "paul",
               "huckabee", "bush", "sanders", "biden", "clinton")

merged.corr.dime.2$office1 <- ifelse(merged.corr.dime.2$cand1 %in% pres.list, "pres", "sen")
merged.corr.dime.2$office2 <- ifelse(merged.corr.dime.2$cand2 %in% pres.list, "pres", "sen")

## Add voteshares
#first cand
colnames(voteshares)[1] <- "voteshare1"
colnames(voteshares)[2] <- "cand1"
colnames(voteshares)[3] <- "year_cand1"
merged.corr.dime.2.voteshares  <- merged.corr.dime.2 %>% 
  left_join(voteshares, by=c("cand1", "year_cand1", "state"))

#second cand
colnames(voteshares)[1] <- "voteshare2"
colnames(voteshares)[2] <- "cand2"
colnames(voteshares)[3] <- "year_cand2"

merged.corr.dime.2.voteshares.2  <- merged.corr.dime.2.voteshares %>% 
  left_join(voteshares, by=c("cand2", "year_cand2", "state"))

#remove duplicates 
#create a unique string per combination of party, state, and candidates 
merged.2.voteshares.2.nodups<-merged.corr.dime.2.voteshares.2%>%
  mutate(
    candyear1id = paste0(party, state, cand1, year_cand1),
    candyear2id = paste0(party, state,cand2, year_cand2),
    pair = map2_chr(candyear1id, candyear2id, ~ paste(sort(c(.x, .y)), collapse = "_"))
  )%>%
  distinct(pair, .keep_all = TRUE) %>%
  dplyr::select(-pair)

### subset to cross-year comparisons
merged.2.voteshares.2.nodups.3 <- merged.2.voteshares.2.nodups[!merged.2.voteshares.2.nodups$year_cand1==merged.2.voteshares.2.nodups$year_cand2,]
merged.2.voteshares.2.nodups.3$same_office <- 1*(merged.2.voteshares.2.nodups.3$office1==merged.2.voteshares.2.nodups.3$office2)
merged.2.voteshares.2.nodups.3$year_diff <- abs(merged.2.voteshares.2.nodups.3$year_cand2-merged.2.voteshares.2.nodups.3$year_cand1)
merged.2.voteshares.2.nodups.3$same_candidate <- 1*(merged.2.voteshares.2.nodups.3$cand1==merged.2.voteshares.2.nodups.3$cand2)
merged.2.voteshares.2.nodups.3$year_comb <- paste0(merged.2.voteshares.2.nodups.3$year_cand1, "-",merged.2.voteshares.2.nodups.3$year_cand2)
merged.2.voteshares.2.nodups.3$party <- merged.2.voteshares.2.nodups.3$party %>% dplyr::recode(
  "reps" = "GOP" #fixing one of the party names
)
merged.2.voteshares.2.nodups.3%>%
  filter(merged.2.voteshares.2.nodups.3$cand1==merged.2.voteshares.2.nodups.3$cand2)%>%
  dplyr::select(corr)%>%
  mutate(high=as.numeric(corr>0.5))%>%
  group_by(high)%>%
  dplyr::summarise(count=n()/nrow(.))#38% of same candidates over 0.5 corr

lout1 <- lm(corr~ as.factor(state)+as.factor(party)+as.factor(year_comb)+year_diff+same_office+same_candidate+as.factor(office1),data=merged.2.voteshares.2.nodups.3)
lout2 <- lm(corr~ as.factor(state)+as.factor(party)+as.factor(year_comb)+year_diff+same_office+same_candidate+as.factor(office1)+
              voteshare1,data=merged.2.voteshares.2.nodups.3)
lout3 <- lm(corr~ cfscore.diff+as.factor(state)+as.factor(party)+as.factor(year_comb)+year_diff+same_office+as.factor(office1),data=merged.2.voteshares.2.nodups.3)
lout4 <- lm(corr~ cfscore.diff+as.factor(state)+as.factor(party)+as.factor(year_comb)+year_diff+same_office+as.factor(office1)+
              voteshare1,data=merged.2.voteshares.2.nodups.3)
lout5 <- lm(corr~ cfscore.diff+as.factor(state)+as.factor(party)+as.factor(year_cand1.sen.pres)+year_diff+same_office+as.factor(office1)+
              voteshare1,data=merged.2.voteshares.2.nodups.3)

round(sd(merged.2.voteshares.2.nodups.3$cfscore.diff,na.rm=T)*as.numeric(lout3$coefficients[2]),2)#-0.06 for standard deviation increase

#table 4
texreg(list(lout1,lout2,lout3,lout4,lout5),
       digits=3,
       stars=0.05,
       label="t:joint:corr", 
       caption = "OLS regression analyses of cross-year, within-state, within-party correlations between candidates in contested primaries, 2008-2024. The omitted state is Alabama. The omitted party is Democratic; the omitted year is 2008.",
       file="table4.tex")

table4 <- readLines("table4.tex")
table4 <- gsub("as\\.factor\\(party\\)", "", table4)
table4 <- gsub("as\\.factor\\(state\\)", "", table4)
table4 <- gsub("as\\.factor\\(office1\\)sen", "Senate", table4)
table4 <- gsub("cfscore.diff", "DIME Dyn. Score Diff.", table4)
table4 <- gsub("voteshare1", "Vote Share, Cand. 1", table4)
table4 <- gsub("same\\\\_candidate", "Same Cand.", table4)
table4 <- gsub("same\\\\_office", "Same Office", table4)
table4 <- gsub("as\\.factor\\(year\\\\_comb\\)", "", table4)
table4 <- gsub("as\\.factor\\(year\\\\_cand1\\.sen\\.pres\\)(\\d{4})", "\\1", table4)
table4 <- gsub("2016.5", "2016 (Senate)", table4)
table4 <- gsub("2024.5", "2024 (Senate)", table4)
table4 <- gsub("year\\\\_diff", "Year Diff.", table4)
table4 <- gsub("Num\\. obs\\.", "N", table4)

#remove year-pair fixed effects
table4.cl<-table4[-(20:121)]
table4.cl<-table4.cl[-(56)]
table4.cl <- append(table4.cl, "\\scalebox{0.8}{", after = 3)
table4.cl <- append(table4.cl, "}", after = 60)
writeLines(table4.cl, "table4.tex")#table 4

unique(c(paste(merged.2.voteshares.2.nodups.3$cand2, merged.2.voteshares.2.nodups.3$year_cand2),paste(merged.2.voteshares.2.nodups.3$cand1, merged.2.voteshares.2.nodups.3$year_cand1)))#66 unique candidacies

